This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
# Load the necessary library
library(MASS)
library(ggplot2)
library(plotly)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: ‘plotly’
The following object is masked from ‘package:MASS’:
select
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
# Load a dataset
data(mtcars)
# Fit a linear model
model <- lm(mpg ~ wt + hp, data = mtcars)
summary(model)
Call:
lm(formula = mpg ~ wt + hp, data = mtcars)
Residuals:
Min 1Q Median 3Q Max
-3.941 -1.600 -0.182 1.050 5.854
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 37.22727 1.59879 23.285 < 2e-16 ***
wt -3.87783 0.63273 -6.129 1.12e-06 ***
hp -0.03177 0.00903 -3.519 0.00145 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.593 on 29 degrees of freedom
Multiple R-squared: 0.8268, Adjusted R-squared: 0.8148
F-statistic: 69.21 on 2 and 29 DF, p-value: 9.109e-12
# Summarize the model
# Plot the data and the fitted model
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x) +
labs(title = "Linear Regression of MPG on Weight and Horsepower",
x = "Weight (1000 lbs)",
y = "Miles per Gallon (MPG)")
patientID = 1:1000
myMean = c(47,160)
myCov = matrix(c(40, 35, 35, 100), nrow=2)
myData = mvrnorm(n=1000, mu=myMean, Sigma = myCov)
colnames(myData) = c("patientAge", "patientWeight")
patientResponse = 0.6*myData[,"patientAge"] - 0.2*myData[,"patientWeight"]+runif(1000, min=-1, max=5)
patientData = data.frame(patientID, myData, patientResponse)
summary(patientData)
patientID patientAge patientWeight patientResponse
Min. : 1.0 Min. :28.97 Min. :124.6 Min. :-11.8459
1st Qu.: 250.8 1st Qu.:42.75 1st Qu.:153.2 1st Qu.: -4.0399
Median : 500.5 Median :47.27 Median :160.5 Median : -1.6391
Mean : 500.5 Mean :47.34 Mean :160.3 Mean : -1.6503
3rd Qu.: 750.2 3rd Qu.:51.67 3rd Qu.:167.1 3rd Qu.: 0.7042
Max. :1000.0 Max. :65.01 Max. :190.0 Max. : 10.2958
pairs(patientData)
hist(patientResponse)
myPlot = plot_ly(patientData, type="scatter3d", mode="markers",
x=~patientAge, y=~patientWeight, z=~patientResponse, size=I(10))
myPlot = myPlot %>% layout(title = 'Patient Response')
myPlot
NA
model2 = lm(patientResponse ~ patientAge + patientWeight, data = patientData)
summary(model2)
Call:
lm(formula = patientResponse ~ patientAge + patientWeight, data = patientData)
Residuals:
Min 1Q Median 3Q Max
-3.0737 -1.5143 -0.0195 1.4782 3.1239
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.948634 0.859927 1.103 0.27
patientAge 0.595360 0.010719 55.544 <2e-16 ***
patientWeight -0.192079 0.006626 -28.989 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.715 on 997 degrees of freedom
Multiple R-squared: 0.7576, Adjusted R-squared: 0.7571
F-statistic: 1558 on 2 and 997 DF, p-value: < 2.2e-16
hist(model2$residual)
myPlot %>%
add_trace(patientData, x=~patientAge, y=~patientWeight, z=model2$fitted.values, type="mesh3d" )
Warning: 'mesh3d' objects don't have these attributes: 'mode'
Valid attributes include:
'alphahull', 'autocolorscale', 'cauto', 'cmax', 'cmid', 'cmin', 'color', 'coloraxis', 'colorbar', 'colorscale', 'contour', 'customdata', 'customdatasrc', 'delaunayaxis', 'facecolor', 'facecolorsrc', 'flatshading', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'i', 'ids', 'idssrc', 'intensity', 'intensitymode', 'intensitysrc', 'isrc', 'j', 'jsrc', 'k', 'ksrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'lighting', 'lightposition', 'meta', 'metasrc', 'name', 'opacity', 'reversescale', 'scene', 'showlegend', 'showscale', 'stream', 'text', 'textsrc', 'type', 'uid', 'uirevision', 'vertexcolor', 'vertexcolorsrc', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
Warning: 'mesh3d' objects don't have these attributes: 'mode'
Valid attributes include:
'alphahull', 'autocolorscale', 'cauto', 'cmax', 'cmid', 'cmin', 'color', 'coloraxis', 'colorbar', 'colorscale', 'contour', 'customdata', 'customdatasrc', 'delaunayaxis', 'facecolor', 'facecolorsrc', 'flatshading', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'i', 'ids', 'idssrc', 'intensity', 'intensitymode', 'intensitysrc', 'isrc', 'j', 'jsrc', 'k', 'ksrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'lighting', 'lightposition', 'meta', 'metasrc', 'name', 'opacity', 'reversescale', 'scene', 'showlegend', 'showscale', 'stream', 'text', 'textsrc', 'type', 'uid', 'uirevision', 'vertexcolor', 'vertexcolorsrc', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
patientData$patientResponse = 0.3*(myData[,"patientAge"]-47)^2 - 0.5*(myData[,"patientWeight"]-160)^2 + 45 + rnorm(1000, mean=0, sd=50)
summary(patientData)
patientID patientAge patientWeight patientResponse
Min. : 1.0 Min. :28.97 Min. :124.6 Min. :-646.108
1st Qu.: 250.8 1st Qu.:42.75 1st Qu.:153.2 1st Qu.: -36.791
Median : 500.5 Median :47.27 Median :160.5 Median : 17.527
Mean : 500.5 Mean :47.34 Mean :160.3 Mean : 6.383
3rd Qu.: 750.2 3rd Qu.:51.67 3rd Qu.:167.1 3rd Qu.: 64.757
Max. :1000.0 Max. :65.01 Max. :190.0 Max. : 208.893
pairs(patientData)
myPlot = plot_ly(patientData, type="scatter3d", mode="markers",
x=~patientAge, y=~patientWeight, z=~patientResponse, size=I(10))
myPlot = myPlot %>% layout(title = 'Patient Response')
myPlot
NA
#Try a polynomial of first order in the predictors
myGLM1 = glm(patientResponse ~ patientAge + patientWeight, data = patientData)
summary(myGLM1)
Call:
glm(formula = patientResponse ~ patientAge + patientWeight, data = patientData)
Deviance Residuals:
Min 1Q Median 3Q Max
-660.39 -42.50 11.43 59.33 207.04
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 40.0520 43.8541 0.913 0.361
patientAge 0.5203 0.5466 0.952 0.341
patientWeight -0.3638 0.3379 -1.077 0.282
(Dispersion parameter for gaussian family taken to be 7652.618)
Null deviance: 7639638 on 999 degrees of freedom
Residual deviance: 7629660 on 997 degrees of freedom
AIC: 11786
Number of Fisher Scoring iterations: 2
hist(myGLM1$residuals)
#Can you a density plot instead
#plot(density(myGLM$residuals))
confint(myGLM1)
Waiting for profiling to be done...
2.5 % 97.5 %
(Intercept) -45.9004223 126.0044402
patientAge -0.5510902 1.5916656
patientWeight -1.0260403 0.2985071
plot(patientData$patientResponse, myGLM1$fitted.values)
# myPlot %>%
# add_trace(patientData, x=~patientAge, y=~patientWeight, z=myGLM$fitted.values, type="mesh3d" )
#Try a polynomial of first order in the predictors with the cross term
#Try a polynomial of second order in the predictors without the cross term
myGLM3 = glm(patientResponse ~ I(patientAge^2) + I(patientWeight^2) + patientAge * patientWeight, data = patientData)
summary(myGLM3)
Call:
glm(formula = patientResponse ~ I(patientAge^2) + I(patientWeight^2) +
patientAge * patientWeight, data = patientData)
Deviance Residuals:
Min 1Q Median 3Q Max
-164.584 -33.221 0.119 33.877 166.910
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.202e+04 2.830e+02 -42.497 < 2e-16 ***
I(patientAge^2) 3.061e-01 4.474e-02 6.842 1.36e-11 ***
I(patientWeight^2) -4.973e-01 1.687e-02 -29.482 < 2e-16 ***
patientAge -2.809e+01 5.279e+00 -5.321 1.28e-07 ***
patientWeight 1.592e+02 4.110e+00 38.746 < 2e-16 ***
patientAge:patientWeight -4.241e-03 4.547e-02 -0.093 0.926
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 2625.795)
Null deviance: 7639638 on 999 degrees of freedom
Residual deviance: 2610040 on 994 degrees of freedom
AIC: 10719
Number of Fisher Scoring iterations: 2
hist(myGLM3$residuals)
myPlot %>%
add_trace(patientData, x=~patientAge, y=~patientWeight, z=myGLM3$fitted.values, type="mesh3d" )
Warning: 'mesh3d' objects don't have these attributes: 'mode'
Valid attributes include:
'alphahull', 'autocolorscale', 'cauto', 'cmax', 'cmid', 'cmin', 'color', 'coloraxis', 'colorbar', 'colorscale', 'contour', 'customdata', 'customdatasrc', 'delaunayaxis', 'facecolor', 'facecolorsrc', 'flatshading', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'i', 'ids', 'idssrc', 'intensity', 'intensitymode', 'intensitysrc', 'isrc', 'j', 'jsrc', 'k', 'ksrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'lighting', 'lightposition', 'meta', 'metasrc', 'name', 'opacity', 'reversescale', 'scene', 'showlegend', 'showscale', 'stream', 'text', 'textsrc', 'type', 'uid', 'uirevision', 'vertexcolor', 'vertexcolorsrc', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
Warning: 'mesh3d' objects don't have these attributes: 'mode'
Valid attributes include:
'alphahull', 'autocolorscale', 'cauto', 'cmax', 'cmid', 'cmin', 'color', 'coloraxis', 'colorbar', 'colorscale', 'contour', 'customdata', 'customdatasrc', 'delaunayaxis', 'facecolor', 'facecolorsrc', 'flatshading', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'i', 'ids', 'idssrc', 'intensity', 'intensitymode', 'intensitysrc', 'isrc', 'j', 'jsrc', 'k', 'ksrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'lighting', 'lightposition', 'meta', 'metasrc', 'name', 'opacity', 'reversescale', 'scene', 'showlegend', 'showscale', 'stream', 'text', 'textsrc', 'type', 'uid', 'uirevision', 'vertexcolor', 'vertexcolorsrc', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
plot(patientData$patientResponse, myGLM3$fitted.values)
#Generating data for predictions where we do not have the patient responses and would like to generate
#them using our current model
patientID = 1:500
myMean = c(47,160)
mySigma = matrix(c(40, 35, 35, 100), nrow=2)
myData = mvrnorm(n=500, mu=myMean, Sigma = mySigma)
colnames(myData) = c("patientAge", "patientWeight")
newPatientData = data.frame(myData)
newPatientResponse = predict(myGLM3, newdata = newPatientData)